This is a report on user interactions with Wikimedia Maps usage across Wikimedia Projects via Schema:Kartographer event logging implemented by Julien Girault (see T163139 for more details). It is a companion to the report on Wikimedia Maps usage on Wikimedia projects.

Data

SELECT
  DATE(TIMESTAMP) AS `date`,
  TIMESTAMP AS ts, wiki,
  event_feature AS feature,
  event_action AS `event`,
  event_extra AS extras,
  event_duration AS duration,
  event_firstInteraction AS first_interaction,
  event_fullscreen AS fullscreen,
  event_mobile AS mobile,
  event_userToken AS user_token
FROM Kartographer_16132745
WHERE
  LEFT(TIMESTAMP, 8) >= '20170514'
  AND LEFT(timestamp, 8) <= '20170911'
results <- wmf::mysql_read(query, "log", con)
results$first_interaction <- results$first_interaction == 1
results$fullscreen <- results$fullscreen == 1
results$mobile <- results$mobile == 1
results$date <- lubridate::ymd(results$date)
results$ts <- lubridate::ymd_hms(results$ts)
results <- cbind(
  dplyr::select(results, wiki, user_token, ts, date, feature, event, duration, first_interaction, fullscreen, mobile),
  purrr::map_df(
    results$extras,
    ~ as.data.frame(jsonlite::fromJSON(.x, simplifyVector = FALSE),
                    stringsAsFactors = FALSE)
))
results$layer %<>% forcats::fct_recode(
  "Relief map" = "Reliefkarte",
  "Hill shading" = "Schummerung",
  "Traffic line\nnetwork" = "Verkehrsliniennetz",
  "Hiking" = "Wanderwege",
  "Boundaries" = "Grenzen",
  "Cycling" = "Radwege",
  "Traffic line\nnetwork" = "Traffic line network"
)
results$service %<>% forcats::fct_recode(
  "Google Maps" = "google-maps",
  "Apple Maps" = "apple-maps",
  "Yandex.Maps" = "yandex-maps",
  "Bing Maps" = "bing-maps",
  "Wikimapia" = "wikimapia",
  "GeoHack" = "geohack",
  "GeoNames" = "geonames",
  "OpenStreetMap" = "openstreetmap",
  "HERE" = "here",
  "MapQuest" = "mapquest",
  "WikiMiniAtlas" = "wikiminiatlas",
  "Waze" = "waze",
  "ACME Mapper" = "acme-mapper",
  "Blue Marble Navigator" = "blue-marble-navigator",
  "Arctic.io" = "arctic-io",
  "SkyVector" = "skyvector"
)
results$type <- polloi::capitalize_first_letter(results$type)
results$type[results$type == "NANA"] <- NA
results <- results[order(results$wiki, results$user_token, results$ts), ]
lang_proj <- polloi::get_langproj() # polloi::update_prefixes()
results <- dplyr::left_join(
  results,
  lang_proj,
  by = c("wiki" = "wikiid")
)
events <- data.table::data.table(results, key = c("wiki", "user_token"))
rm(results, lang_proj) # cleanup
seshs <- events[, list(events = .N),
                by = c("feature", "language", "project", "date", "mobile", "user_token")] %>%
  .[, list(
    sessions = .N,
    events = sum(events),
    mobile_prop = mean(mobile)
  ), by = c("feature", "language", "project", "date")] %>%
  .[, list(
    total = sum(sessions),
    `sessions/day` = as.double(median(sessions)),
    `% of sessions on mobile` = median(mobile_prop),
    `% of sessions on desktop` = 1 - median(mobile_prop)
  ), by = c("feature", "language", "project")] %>%
  dplyr::group_by(feature) %>%
  dplyr::mutate(`% of total sessions` = total / sum(total)) %>%
  dplyr::select(c(`language`, `project`, `% of total sessions`, `sessions/day`, `% of sessions on mobile`, `% of sessions on desktop`))
DT::datatable(
  seshs,
  caption = paste("Per-feature summaries from the past", params$days, "days."),
  filter = "top",
  extensions = "Buttons",
  options = list(
    pageLength = 10, autoWidth = TRUE, language = list(search = "Filter:"),
    order = list(list(4, "desc")), dom = "Bfrtip", buttons = c("copy", "csv")
  )
) %>%
  DT::formatPercentage(c("% of sessions on mobile", "% of sessions on desktop", "% of total sessions"), 1) %>%
  DT::formatCurrency(
    columns = c("sessions/day"),
    currency = "", digits = 0
  )

Interactions

events[feature == "mapframe", list(activated = any(event == "activate")), by = c("date", "user_token")] %>%
  .[, list(activated = 100 * sum(activated) / .N), by = c("date")] %>%
  { xts::xts(.[, -1], order.by = .$date) } %>%
  dygraphs::dygraph(
    xlab = "Date", ylab = "% of sessions",
    main = "Proportion of sessions that have activated a mapframe",
    width = 900
  ) %>%
  dygraphs::dyOptions(drawPoints = TRUE, pointSize = 2) %>%
  dygraphs::dyAxis("x", axisLabelFormatter = polloi::custom_axis_formatter) %>%
  dygraphs::dyAxis(
    "y",
    axisLabelFormatter = "function(x) { return (Math.round(x*100)/100) + '%'; }",
    valueFormatter = "function(x) { return (Math.round(x*100)/100) + '%'; }"
  )

External services sidebar

Screenshot showing the external services sidebar.

Screenshot showing the external services sidebar.

Each point in the graph below is a rolling average of 7 days to better show trends, but if you would like to see the raw daily data you can adjust the number in the bottom left corner.

events[!project %in% c("MediaWiki", "Wikimedia Chapter", "Wikimedia Foundation wiki", "Wikinews", "Wiktionary", "Meta wiki", "Wikidata"),
       list(interacted = any(event %in% c("sidebar-hide", "sidebar-type", "sidebar-click"))),
       by = c("project", "wiki", "user_token", "date")] %>%
  .[, list(`% interacted` = 100 * sum(interacted) / .N), by = c("date", "project")] %>%
  tidyr::spread(project, `% interacted`, fill = 0) %>%
  { xts::xts(.[, -1], order.by = .$date) } %>%
  dygraphs::dygraph(
    xlab = "Date", ylab = "% of sessions",
    main = "Proportion of sessions that have interacted with the sidebar",
    width = 900
  ) %>%
  dygraphs::dyOptions(drawPoints = TRUE, pointSize = 2) %>%
  dygraphs::dyLegend(labelsDiv = "sidebar-legend", width = 900) %>%
  dygraphs::dyAxis("x", axisLabelFormatter = polloi::custom_axis_formatter) %>%
  dygraphs::dyAxis(
    "y",
    axisLabelFormatter = "function(x) { return (Math.round(x*100)/100) + '%'; }",
    valueFormatter = "function(x) { return (Math.round(x*100)/100) + '%'; }"
  ) %>%
  dygraphs::dyRoller(rollPeriod = 7)
DT::datatable(
  events[!is.na(type) & !is.na(service),
         list(sessions = data.table::uniqueN(.SD$user_token)),
         by = c("date", "type", "service")] %>%
    .[, list(`sessions/day` = ceiling(median(sessions))), by = c("type", "service")] %>%
    tidyr::spread(type, `sessions/day`, fill = 0),
  caption = paste("This table shows the median number of sessions per day that have clicked on external services (from the last", params$days, "days)."),
  filter = "top",
  extensions = "Buttons",
  options = list(
    pageLength = 20, autoWidth = TRUE, language = list(search = "Filter:"),
    order = list(list(3, "desc")), dom = "Bfrtip", buttons = c("copy", "csv")
  )
) %>%
  DT::formatCurrency(
    columns = c("Aerial", "Map", "Other", "Satellite", "Terrain", "Topo"),
    currency = "", digits = 0
  )

External layers

Map of Amsterdam from English Wikivoyage with Mapnik as the base, external layer.

Map of Amsterdam from English Wikivoyage with Mapnik as the base, external layer.

n_sessions <- data.table::uniqueN(events[date >= "2017-08-04", c("wiki", "user_token")])
external_layers <- events[
  grepl("exlayer$", event) &
    !is.na(layer) &
    date >= "2017-08-04",
  list(events = .N),
  by = c("wiki", "user_token", "layer", "event")
  ] %>%
  .[, list(
    sessions = length(unique(user_token)) / n_sessions
  ), by = c("event", "layer")] %>%
  dplyr::mutate(event = dplyr::case_when(
    event == "wv-select-exlayer" ~ "Switched to external (non-WMF) layer",
    event == "wv-show-exlayer" ~ "External layer was enabled",
    event == "wv-hide-exlayer" ~ "External layer was hidden"
  ))
ggplot(external_layers, aes(x = layer, y = sessions, fill = event)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_y_continuous(labels = scales::percent_format()) +
  scale_fill_brewer("Interaction", palette = "Set1") +
  labs(
    x = "Layer", y = "Proportion of total sessions",
    title = "Usage of external layers",
    subtitle = glue::glue("Using data from {min(events$date)} through {max(events$date)}"),
    caption = "Does not include/show users who have set external layers as defaults."
  ) +
  wmf::theme_min(base_family = "Source Sans Pro")

Page Discovery

Screenshot showing how Maps can be used to discover articles on Wikipedia via interwiki links.

Screenshot showing how Maps can be used to discover articles on Wikipedia via interwiki links.

There are three possible links the user can click on on a Map:

  • external links take a user outside of Wikimedia projects
  • internal links take a user to a page on the same wiki
  • interwiki links take a user to a page on a different wiki
# discovery: user goes to another page thanks to this feature
totals <- events[, list(sessions = data.table::uniqueN(user_token)), by = c("language", "project", "feature")]
disco <- events[
  event == "discovery",
  list(sessions = data.table::uniqueN(user_token)),
  by = c("language", "project", "destination", "feature")
] %>%
  dplyr::left_join(totals, by = c("language", "project", "feature")) %>%
  dplyr::mutate(prop = sessions.x / sessions.y) %>%
  dplyr::select(c(feature, language, project, destination, prop)) %>%
  tidyr::spread(destination, prop, fill = 0)
DT::datatable(
  disco,
  caption = paste("This table shows the % of sessions that clicked on links to other pages from a map (via maplink/mapframe) across the last", params$days, "days."),
  filter = "top",
  extensions = "Buttons",
  options = list(
    pageLength = 20, autoWidth = TRUE, language = list(search = "Filter:"),
    order = list(list(6, "desc")), dom = "Bfrtip", buttons = c("copy", "csv")
  )
) %>%
  DT::formatPercentage(c("external", "internal", "interwiki"), 1)

Maps on Commons (mapframes specifically) help users discover new content more than on other wikis.

Appendix

Setup

This report was compiled using RMarkdown, knitr, and an open SSH tunnel for connecting to our databases:

ssh -N stat6 -L 3307:analytics-store.eqiad.wmnet:3306